#### Produce the distributed lags figure and table

### Prepare environment

setwd("~/research/coffee-dataverse/src")

library(ggplot2)
library(dplyr)

do.origspec <- T
do.allyears <- T
do.group <- 'all' # Alternatively, 'arabica' or 'robusta'

source("load.R", encoding="iso-8859-1")

### Prepare the data

if (do.group == 'all') {
    df$logproduce <- log(df$total.produce)
    df$logproduce[!is.finite(df$logproduce)] <- NA
    df$logharvest <- log(df$total.harvest)
    df$logharvest[!is.finite(df$logharvest)] <- NA
} else {
    if (do.group == 'arabica')
        munidf <- df %>% group_by(Munic�pio) %>% summarize(ingroup.mainly=mean(arabica.produce > total.produce / 2, na.rm=T))
    else
        munidf <- df %>% group_by(Munic�pio) %>% summarize(ingroup.mainly=mean(robusta.produce > total.produce / 2, na.rm=T))
    ingroup <- subset(munidf, ingroup.mainly > .5)$Munic�pio

    df$logproduce <- log(df$total.produce)
    df$logproduce[!is.finite(df$logproduce)] <- NA
    df$logproduce[!(df$Munic�pio %in% ingroup)] <- NA
    df$logharvest <- log(df$total.harvest)
    df$logharvest[!is.finite(df$logharvest)] <- NA
    df$logharvest[!(df$Munic�pio %in% ingroup)] <- NA
}

seldf <- df[, c('Munic�pio', 'year', 'state', 'tmin', 'tmax', 'prcp', 'prcp2', 'gdd1000', 'edd1000', 'logyield', 'logharvest', 'logproduce', 'longitude', 'latitude')]
seldf$year.p1 <- seldf$year + 1 # 2000 -> 2001
seldf$year.m1 <- seldf$year - 1
seldf$year.m2 <- seldf$year - 2
seldf$year.m3 <- seldf$year - 3
seldf$year.m4 <- seldf$year - 4
seldf$year.m5 <- seldf$year - 5
seldf$year.m6 <- seldf$year - 6

dldf <- seldf %>% left_join(seldf, by=c('Munic�pio', 'year.p1'='year'), suffix=c('', '.p1')) %>% left_join(seldf, by=c('Munic�pio', 'year.m1'='year'), suffix=c('', '.m1')) %>% left_join(seldf, by=c('Munic�pio', 'year.m2'='year'), suffix=c('', '.m2')) %>% left_join(seldf, by=c('Munic�pio', 'year.m3'='year'), suffix=c('', '.m3')) %>% left_join(seldf, by=c('Munic�pio', 'year.m4'='year'), suffix=c('', '.m4')) %>% left_join(seldf, by=c('Munic�pio', 'year.m5'='year'), suffix=c('', '.m5')) %>% left_join(seldf, by=c('Munic�pio', 'year.m6'='year'), suffix=c('', '.m6'))
dldf <- dldf[, -grep("year", names(dldf))[-1]]

library(lfe)
library(splines)

dldf$year2 <- dldf$year * dldf$year / 1000

dldf$logharvest.diff <- dldf$logharvest - dldf$logharvest.m1
dldf$logyield.diff <- dldf$logyield - dldf$logyield.m1
dldf$logproduce.diff <- dldf$logproduce - dldf$logproduce.m1
dldf$tmin.diff <- dldf$tmin - dldf$tmin.m1
dldf$gdd1000.diff <- dldf$gdd1000 - dldf$gdd1000.m1
dldf$edd1000.p1.diff <- dldf$edd1000.p1 - dldf$edd1000
dldf$edd1000.diff <- dldf$edd1000 - dldf$edd1000.m1
dldf$edd1000.m1.diff <- dldf$edd1000.m1 - dldf$edd1000.m2
dldf$edd1000.m2.diff <- dldf$edd1000.m2 - dldf$edd1000.m3
dldf$edd1000.m3.diff <- dldf$edd1000.m3 - dldf$edd1000.m4
dldf$edd1000.m4.diff <- dldf$edd1000.m4 - dldf$edd1000.m5
dldf$edd1000.m5.diff <- dldf$edd1000.m5 - dldf$edd1000.m6
dldf$prcp.diff <- dldf$prcp - dldf$prcp.m1
dldf$prcp2.diff <- dldf$prcp2 - dldf$prcp2.m1

dldf$edd1000.m12.diff <- (dldf$edd1000.m1 - dldf$edd1000.m2) + (dldf$edd1000.m2 - dldf$edd1000.m3)

## Perform the regressions

source("conley.R")

if (do.origspec) {
    mod.delay.harvest.2w <- felm(logharvest.diff ~ edd1000.p1.diff + edd1000.diff + edd1000.m1.diff + edd1000.m2.diff + edd1000.m3.diff + edd1000.m4.diff + edd1000.m5.diff + state:ns(year, df=3) | year | 0 | Munic�pio + year, data=dldf, keepCX=T)
    mod.delay.harvest.con <- ConleySEs(mod.delay.harvest.2w, dldf[-mod.delay.harvest.2w$na.action,], "Munic�pio", "year", "latitude", "longitude", dist_cutoff=400, lag_cutoff=1)
    mod.delay.harvest.full.2w <- felm(logharvest.diff ~ tmin.diff + gdd1000.diff + edd1000.p1.diff + edd1000.diff + edd1000.m1.diff + edd1000.m2.diff + edd1000.m3.diff + edd1000.m4.diff + edd1000.m5.diff + prcp.diff + prcp2.diff + state:ns(year, df=3) | year | 0 | Munic�pio + year, data=dldf, keepCX=T)
    mod.delay.harvest.full.con <- ConleySEs(mod.delay.harvest.full.2w, dldf[-mod.delay.harvest.full.2w$na.action,], "Munic�pio", "year", "latitude", "longitude", dist_cutoff=400, lag_cutoff=1)
    mod.delay.harvest.part.2w <- felm(logharvest.diff ~ tmin.diff + gdd1000.diff + edd1000.diff + edd1000.m12.diff + prcp.diff + prcp2.diff + state:ns(year, df=3) | year | 0 | Munic�pio + year, data=dldf, keepCX=T)
    mod.delay.harvest.part.con <- ConleySEs(mod.delay.harvest.part.2w, dldf[-mod.delay.harvest.part.2w$na.action,], "Munic�pio", "year", "latitude", "longitude", dist_cutoff=400, lag_cutoff=1)
} else {
    mod.delay.harvest <- felm(logharvest ~ tmin + tmax + gdd1000 + edd1000.p1 + edd1000 + edd1000.m1 + edd1000.m2 + edd1000.m3 + edd1000.m4 + prcp + prcp2 + state:ns(year, df=3) | Munic�pio + year | 0 | Munic�pio + year, data=dldf)
}

maxdelay <- 5
eddinds <- grep("edd", rownames(mod.delay.harvest.full.2w$coeff))
pdf.harvest.full.2w <- data.frame(delay=-1:maxdelay, coeff=mod.delay.harvest.full.2w$coeff[eddinds], cse=mod.delay.harvest.full.2w$cse[eddinds])
pdf.harvest.full.con <- data.frame(delay=-1:maxdelay, coeff=mod.delay.harvest.full.2w$coeff[eddinds], cse=sqrt(diag(mod.delay.harvest.full.con$Spatial_HAC)[eddinds]))

ggplot(pdf.harvest.full.2w, aes(delay, coeff)) +
    geom_line() + geom_ribbon(aes(ymin=coeff - 1.96 * cse, ymax=coeff + 1.96 * cse), alpha=.5)
ggplot(pdf.harvest.full.con, aes(delay, coeff)) +
    geom_line() + geom_ribbon(aes(ymin=coeff - 1.96 * cse, ymax=coeff + 1.96 * cse), alpha=.5)

if (do.origspec) {
    mod.delay.produce.2w <- felm(logproduce.diff ~ edd1000.p1.diff + edd1000.diff + edd1000.m1.diff + edd1000.m2.diff + edd1000.m3.diff + edd1000.m4.diff + edd1000.m5.diff + state:ns(year, df=3) | year | 0 | Munic�pio + year, data=dldf, keepCX=T)
    mod.delay.produce.con <- ConleySEs(mod.delay.produce.2w, dldf[-mod.delay.produce.2w$na.action,], "Munic�pio", "year", "latitude", "longitude", dist_cutoff=400, lag_cutoff=1)
    mod.delay.produce.full.2w <- felm(logproduce.diff ~ tmin.diff + gdd1000.diff + edd1000.p1.diff + edd1000.diff + edd1000.m1.diff + edd1000.m2.diff + edd1000.m3.diff + edd1000.m4.diff + edd1000.m5.diff + prcp.diff + prcp2.diff + state:ns(year, df=3) | year | 0 | Munic�pio + year, data=dldf, keepCX=T)
    mod.delay.produce.full.con <- ConleySEs(mod.delay.produce.full.2w, dldf[-mod.delay.produce.full.2w$na.action,], "Munic�pio", "year", "latitude", "longitude", dist_cutoff=400, lag_cutoff=1)
    mod.delay.produce.part.2w <- felm(logproduce.diff ~ tmin.diff + gdd1000.diff + edd1000.diff + edd1000.m12.diff + prcp.diff + prcp2.diff + state:ns(year, df=3) | year | 0 | Munic�pio + year, data=dldf, keepCX=T)
    mod.delay.produce.part.con <- ConleySEs(mod.delay.produce.part.2w, dldf[-mod.delay.produce.part.2w$na.action,], "Munic�pio", "year", "latitude", "longitude", dist_cutoff=400, lag_cutoff=1)
} else {
    mod.delay.produce <- felm(logproduce ~ tmin + tmax + gdd1000 + edd1000.p1 + edd1000 + edd1000.m1 + edd1000.m2 + edd1000.m3 + edd1000.m4 + prcp + prcp2 + state:ns(year, df=3) | Munic�pio + year | 0 | Munic�pio + year, data=dldf)
}

eddinds <- grep("edd", rownames(mod.delay.produce.full.2w$coeff))
pdf.produce.full.2w <- data.frame(delay=-1:maxdelay, coeff=mod.delay.produce.full.2w$coeff[eddinds], cse=mod.delay.produce.full.2w$cse[eddinds])
pdf.produce.full.con <- data.frame(delay=-1:maxdelay, coeff=mod.delay.produce.full.2w$coeff[eddinds], cse=sqrt(diag(mod.delay.produce.full.con$Spatial_HAC)[eddinds]))

ggplot(pdf.produce.full.2w, aes(delay, coeff)) +
    geom_line() + geom_ribbon(aes(ymin=coeff - 1.96 * cse, ymax=coeff + 1.96 * cse), alpha=.5)
ggplot(pdf.produce.full.con, aes(delay, coeff)) +
    geom_line() + geom_ribbon(aes(ymin=coeff - 1.96 * cse, ymax=coeff + 1.96 * cse), alpha=.5)

## Make the main figure

ggplot(rbind(cbind(pdf.harvest.full.2w, Metric='Harvest'), cbind(pdf.produce.full.2w, Metric='Production')),
       aes(delay, coeff, colour=Metric, fill=Metric)) +
    facet_wrap(~ Metric) +
    geom_line(size=1) + geom_ribbon(aes(ymin=coeff - 1.96 * cse, ymax=coeff + 1.96 * cse), alpha=.33, size=.2) +
    geom_ribbon(data=rbind(cbind(pdf.harvest.full.con, Metric='Harvest'), cbind(pdf.produce.full.con, Metric='Production')),
                aes(ymin=coeff - 1.96 * cse, ymax=coeff + 1.96 * cse), alpha=.33, size=.2) +
    theme_bw() + scale_x_continuous(expand=c(0, 0)) + xlab("Years prior to harvest (shock in year 0)") +
    ylab("Coefficient on EDD") + geom_hline(yintercept=0)
ggsave("../figures/regs/delays.pdf", width=7.5, height=4.5)

## Produce the regression table

library(stargazer)

stargazer(list(mod.delay.harvest.2w, mod.delay.harvest.full.2w, mod.delay.harvest.part.2w, mod.delay.produce.2w, mod.delay.produce.full.2w, mod.delay.produce.part.2w),
          add.lines=list(c('Year FE', rep('Yes', 6)),
                         c('State Trends', rep('Cubic', 6))),
          dep.var.labels=c("$\\Delta \\log h_t$", "$\\Delta \\log q_t$"),
          covariate.labels=c(paste0("$\\Delta \\text{", weatherlabels[1:2], "}$"),
                             paste0("$\\Delta \\text{", weatherlabels[3], "}_{t+1}$"),
                             paste0("$\\Delta \\text{", weatherlabels[3], "}_{t}$"),
                             paste0("$\\Delta \\text{", weatherlabels[3], "}_{t-1}$"),
                             paste0("$\\Delta \\text{", weatherlabels[3], "}_{t-2}$"),
                             paste0("$\\Delta \\text{", weatherlabels[3], "}_{t-3}$"),
                             paste0("$\\Delta \\text{", weatherlabels[3], "}_{t-4}$"),
                             paste0("$\\Delta \\text{", weatherlabels[3], "}_{t-5}$"),
                             paste0("$\\Delta \\text{", weatherlabels[3], "}_{t-k}$"),
                             paste0("$\\Delta \\text{", gsub("\\^2", "$^2$", weatherlabels[4:5]), "}$")),
          omit=':', df=F, float=F, out="../tables/regs/delays-w2.tex")

## Conley SE
stargazer(list(mod.delay.harvest.2w, mod.delay.harvest.full.2w, mod.delay.harvest.part.2w, mod.delay.produce.2w, mod.delay.produce.full.2w, mod.delay.produce.part.2w),
          add.lines=list(c('Year FE', rep('Yes', 6)),
                         c('State Trends', rep('Cubic', 6))),
          dep.var.labels=c("$\\Delta \\log h_t$", "$\\Delta \\log q_t$"),
          covariate.labels=c(paste0("$\\Delta \\text{", weatherlabels[1:2], "}$"),
                             paste0("$\\Delta \\text{", weatherlabels[3], "}_{t+1}$"),
                             paste0("$\\Delta \\text{", weatherlabels[3], "}_{t}$"),
                             paste0("$\\Delta \\text{", weatherlabels[3], "}_{t-1}$"),
                             paste0("$\\Delta \\text{", weatherlabels[3], "}_{t-2}$"),
                             paste0("$\\Delta \\text{", weatherlabels[3], "}_{t-3}$"),
                             paste0("$\\Delta \\text{", weatherlabels[3], "}_{t-4}$"),
                             paste0("$\\Delta \\text{", weatherlabels[3], "}_{t-5}$"),
                             paste0("$\\Delta \\text{", weatherlabels[3], "}_{t-k}$"),
                             paste0("$\\Delta \\text{", gsub("\\^2", "$^2$", weatherlabels[4:5]), "}$")),
          omit=':', df=F, float=F, out="../tables/regs/delays-con.tex",
          se=list(sqrt(diag(mod.delay.harvest.con$Spatial_HAC)), sqrt(diag(mod.delay.harvest.full.con$Spatial_HAC)), sqrt(diag(mod.delay.harvest.part.con$Spatial_HAC)), sqrt(diag(mod.delay.produce.con$Spatial_HAC)), sqrt(diag(mod.delay.produce.full.con$Spatial_HAC)), sqrt(diag(mod.delay.produce.part.con$Spatial_HAC))))
